home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Skunkware 5
/
Skunkware 5.iso
/
src
/
X11
/
wais
/
ir
/
irfiles.h
< prev
next >
Wrap
C/C++ Source or Header
|
1995-05-09
|
7KB
|
225 lines
/* WIDE AREA INFORMATION SERVER SOFTWARE:
No guarantees or restrictions. See the readme file for the full standard
disclaimer.
Brewster@think.com
*
* $Log: irfiles.h,v $
* Revision 1.19 92/04/16 20:04:44 morris
* small fix to dictionary_blockword_occurances, lenght read was
* NEXT_INDEX_BLOCK_SIZE, now its NUMBR_OF_OCCURANCES_SIZE.
*
* Revision 1.18 92/03/19 09:34:08 morris
* fixed the dictionary header to accurately indicate the number of blocks
*
* Revision 1.17 92/02/17 12:38:00 jonathan
* Added defines for catalog.
*
*/
/* include file for irfiles.c */
#ifndef IRFILES_H
#define IRFILES_H
#include "cdialect.h"
#include "cutil.h"
#include "hash.h"
#include "ustubs.h" /* for time_t */
/* filename extensions for various components */
#define dictionary_ext ".dct"
#define filename_table_ext ".fn"
#define headline_table_ext ".hl"
#define document_table_ext ".doc"
#define index_ext ".inv"
#define source_ext ".src"
#define catalog_ext ".cat"
/* these dictionary definitions are used in irhash,irverify, and irfiles */
#define DICTIONARY_HEADER_SIZE 4
#define DICTIONARY_BLOCK_SIZE 1000L /* in entries, not bytes */
#define DICTIONARY_ENTRY_HASH_CODE_SIZE 2
/* #define DICTIONARY_ENTRY_COUNT_SIZE 3 moved to inverted file */
/* #define DICTIONARY_ENTRY_INDEX_BLOCK_SIZE 4 not used and too long a symbol*/
/* #define DICTIONARY_ELEMENT_SIZE 6 was 9 */
#define DICTIONARY_SIZE 524288L
#define DICTIONARY_TOTAL_SIZE_WORD "{}" /* the word that holds the total number of words in the whole dictionary */
#define INDEX_HEADER_SIZE 4
#define INDEX_BLOCK_SIZE_SIZE 2
#define NEXT_INDEX_BLOCK_SIZE 4
#define INDEX_BLOCK_FLAG_SIZE 1
#define INDEX_BLOCK_HEADER_SIZE 7
#define NUMBER_OF_OCCURANCES_SIZE 4
#define INDEX_BLOCK_NOT_FULL_FLAG 101
#define INDEX_BLOCK_FULL_FLAG 69
#define INDEX_BLOCK_DICTIONARY_FLAG 123
#define DOCUMENT_ID_SIZE 4
#define WORD_POSITION_SIZE 0
#define CHARACTER_POSITION_SIZE 3
#define WEIGHT_SIZE 1
#define INDEX_ELEMENT_SIZE 8
#define WORD_ID_SIZE 4 /* for posting arrays */
typedef struct database {
char* database_file;
FILE* dictionary_stream;
FILE* filename_table_stream;
FILE* headline_table_stream;
FILE* document_table_stream;
FILE* index_stream;
long doc_table_allocated_entries;
hashtable* the_word_memory_hashtable;
long number_of_words_in_hashtable; /* for building.
checked on every add_word.
set at start of building,
and on every flush.*/
long flush_after_n_words; /* set at the start of building used
to compare with
number_of_words_in_hashtable. */
long number_of_words; /* for building. number of different words.
Set from the headers of .inv files
as they are merged.
It is used to set the header when a .inv
file is first created (not by merging).
*/
long index_file_number; /* for building. */
long total_word_count; /* Total number of word occurances.
set during indexing, saved in
dictionary under 'ALL' entry */
void* ext_database;
} database;
typedef struct document_table_entry {
long filename_id;
long headline_id;
long source_id; /* for signature system */
long start_character;
long end_character;
long document_length; /* in characters */
long number_of_lines; /* in lines */
time_t date; /* 0 if unknown */
} document_table_entry;
#ifdef __cplusplus
/* declare these as C style functions */
extern "C"
{
#endif /* def __cplusplus */
database* openDatabase _AP((char* name, boolean initialize,boolean for_search));
void closeDatabase _AP((database* the_db));
void disposeDatabase _AP((database* the_db));
void initialize_index_files _AP((database* db));
char *read_filename_table_entry _AP((long position,
char* filename,
char* type,
time_t* file_write_date,
database* db));
long write_filename_table_entry _AP((char* filename, char *type, database* db));
boolean filename_in_database _AP((char *filename, char *type,
time_t *write_file_date, database *db));
boolean filename_in_filename_file _AP ((char *filename, char*type,
time_t *file_write_date,
char* filename_file));
char *read_headline_table_entry _AP((long position,database* db));
long write_headline_table_entry _AP((char* headline, database* db));
boolean read_document_table_entry
_AP((document_table_entry* doc_entry,long number,database* db));
long write_document_table_entry
_AP((document_table_entry* doc_table_entry, database* db));
boolean writeUserValToDocIDTable _AP((unsigned long userVal,long doc,
database* db));
long next_document_id _AP((database* db));
void close_dictionary_file _AP((database *db));
long add_word_to_dictionary
_AP((char *word, long index_file_block_number, long number_of_occurances,
database* db));
long look_up_word_in_dictionary _AP((char *word, long *word_id, database* db));
long init_dict_file_for_writing _AP((database *db));
void init_dict_file_detailed _AP((FILE* dictionary_stream,
long number_of_blocks));
void record_num_blocks_in_dict _AP((FILE* dictionary_stream,
long number_of_words));
long finished_add_word_to_dictionary _AP((database *db));
boolean register_src_structure _AP((char *filename));
boolean write_src_structure _AP((char *filename,
char *database_name,
char *typename,
char **filenames,
long number_of_filename,
boolean export_database,
long tcp_port));
boolean build_catalog _AP((database* db));
long allocate_index_block _AP((long how_large, FILE* stream));
unsigned char *read_dictionary_block _AP((unsigned char* block,
long position,long length,
FILE* stream));
void print_dictionary _AP((database* db));
#define DICTIONARY_ENTRY_SIZE 29 /* sum of MAX_WORD_LENGTH, 1 ('\0'),
NEXT_INDEX_BLOCK_SIZE and
NUMBER_OF_OCCURANCES_SIZE */
#ifdef DICT_FUNC
char *dictionary_block_word _AP((long i,unsigned char* block));
long dictionary_block_position _AP((long i,unsigned char* block));
long dictionary_block_word_occurances _AP((long i,unsigned char* block));
#else /* macros */
#define dictionary_block_word(i,block) \
((char *)((block) + ((i) * DICTIONARY_ENTRY_SIZE)))
#define dictionary_block_position(i,block) \
read_bytes_from_memory(NEXT_INDEX_BLOCK_SIZE, \
(block) + ((i) * DICTIONARY_ENTRY_SIZE) + \
MAX_WORD_LENGTH + 1)
#define dictionary_block_word_occurances(i,block) \
read_bytes_from_memory(NUMBER_OF_OCCURANCES_SIZE, \
(block) + ((i) * DICTIONARY_ENTRY_SIZE) + \
MAX_WORD_LENGTH + 1 + NEXT_INDEX_BLOCK_SIZE)
#endif
void print_dictionary_block _AP((unsigned char* block,long size));
/* database functions */
char* dictionary_filename _AP((char* destination, database* db));
char* filename_table_filename _AP((char* destination, database* db));
char* headline_table_filename _AP((char* destination, database* db));
char* document_table_filename _AP((char* destination, database* db));
char* index_filename _AP((char* destination, database* db));
char* index_filename_with_version _AP((long version, char* destination,
database* db));
char* source_filename _AP((char* destination, database* db));
#ifdef __cplusplus
}
#endif /* def __cplusplus */
#endif /* IRFILES_H */